Adaptive PDAC - Background

This is a study of compartment-dependent heterogeneity in pancreatic cancer. In conclusion, we can reveal that intratumor heterogeneity in many cases is related to the invasion front properties; Tumor invading stroma or tumor invading lobes - the acinar invasion.

31 cases had 6 immunohistochemistry markers quantified in QuPath, 4 related to the basal like subtype (CK/KRT17, CK/KRT5, CA125 and HMGA2) and 2 related to the classical subtype (Muc5 and CDX2, which also relates to intestinal expression). Not all stains were available for all cases, which is stated in the supplementary material.

For each case, up to 9 regions of interest (ROIs) for “tumor in septa” (=stroma) and 9 ROIs representing “tumor in acinar (=lobular) invasion”. The ROIs are overlapping across the different stains. All quantifications extracted are only including tumor cells. Data cleanup include removal of ev. annotations in the images that helped correct ROI setting.

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
library(reactable)
library(ggthemes)
library(ggpubr)
## Loading required package: ggplot2
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(corrplot)
## corrplot 0.95 loaded
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.22.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(viridis) 
## Loading required package: viridisLite
library(RColorBrewer)
library(ggVennDiagram)
## 
## Attaching package: 'ggVennDiagram'
## The following object is masked from 'package:tidyr':
## 
##     unite
library(dichromat)
library(survminer)
library(BioVenn)
library(Hmisc)
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(stringr)
library(forcats)
library(ggridges)
library(hrbrthemes)

The data and groupings:

Tables of Wilcoxon-tests with median and mean, separated on each stain

reactable(stat.test.CK17_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(stat.test.Muc5_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(stat.test.CA125_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(stat.test.CK5_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(stat.test.HMGA2_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(stat.test.CDX2_m, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)
reactable(all_combined, striped = T, bordered = T, defaultPageSize = 16, showPageInfo = FALSE, showPageSizeOptions = TRUE, filterable = T)

Boxplots

Separate for each stain, faceted on each case.
Each dot = 1 ROI.

boxes_ck17 <- ggboxplot(all_CK17, x = "Tissue_compartment", y = "Tumor..Positive..",
                        color = "Tissue_compartment", palette = "Set2",
                        add = "jitter",
                        facet.by = "PKR", ncol = 4, short.panel.labs = TRUE) +
  labs(title = "Relative PDAC CK17+ cells in Lobule and Stroma", subtitle = "All cases") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12))+
  xlab(NULL) + ylab("% CK17+ tumor cells")  +
  ylim(-1, 120) +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"))

stat.test_CK17_1 <- stat.test_CK17 %>% add_xy_position(x = "Tissue_compartment")
Suppl_Fig_4b <- boxes_ck17 + 
  stat_pvalue_manual(label = "p.adj.signif",
                                stat.test_CK17_1, tip.length = 0) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

Suppl_Fig_4b

boxes_ck5 <- ggboxplot(all_CK5, x = "Tissue_compartment", y = "Tumor..Positive..",
                        color = "Tissue_compartment", palette = "Set2",
                        add = "jitter",
                        facet.by = "PKR", ncol = 4, short.panel.labs = TRUE) +
  labs(title = "Relative PDAC CK5+ cells in Lobule and Stroma", subtitle = "All cases") +
  xlab(NULL) + ylab("% CK5+ tumor cells") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12)) +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12)) +
  ylim(-1, 120) +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"))

stat.test_CK5_1 <- stat.test_CK5 %>% add_xy_position(x = "Tissue_compartment")
Suppl_5b <- boxes_ck5 +
  stat_pvalue_manual(label = "p.adj.signif",
                                stat.test_CK5_1, tip.length = 0) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Suppl_5b

boxes_ca125 <- ggboxplot(all_CA125, x = "Tissue_compartment", y = "Tumor..Positive..",
                        color = "Tissue_compartment",
                        add = "jitter",
                        facet.by = "PKR", ncol = 4, short.panel.labs = TRUE) +
  labs(title = "Relative PDAC CA125+ cells in Lobule and Stroma", subtitle = "All cases") +
  xlab(NULL) + ylab("% CA125+ tumor cells") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12)) +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12)) +
  ylim(-1, 120) +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm")) +
  scale_color_manual(values = c("#FC8D62", "#66C2A5"))

stat.test_CA125_1 <- stat.test_CA125 %>% add_xy_position(x = "Tissue_compartment")

Suppl_Fig_7b <- boxes_ca125 + stat_pvalue_manual(label = "p.adj.signif",
                                stat.test_CA125_1, tip.length = 0) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Suppl_Fig_7b

boxes_HMGA2 <- ggboxplot(all_HMGA2, x = "Tissue_compartment", y = "Tumor..Positive..",
                         color = "Tissue_compartment",
                         add = "jitter",
                         facet.by = "PKR", ncol = 3, short.panel.labs = TRUE) +
  labs(title = "Relative PDAC HMGA2+ cells in Lobule and Stroma", subtitle = "All cases") +
  xlab(NULL) + ylab("% HMGA2+ tumor cells") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12)) +
  theme(axis.text=element_text(size=12), axis.title=element_text(size=12)) +
  ylim(-1, 120) +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"))+
  scale_color_manual(values = c("#FC8D62", "#66C2A5"))

stat.test_HMGA2_1 <- stat.test_HMGA2 %>% add_xy_position(x = "Tissue_compartment")

Suppl_Fig_6b <- boxes_HMGA2 + stat_pvalue_manual(label = "p.adj.signif",
                                 stat.test_HMGA2_1, tip.length = 0) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Suppl_Fig_6b

boxes_Muc5 <- ggboxplot(all_Muc5, x = "Tissue_compartment", y = "Tumor..Positive..",
                       color = "Tissue_compartment",
                       add = "jitter",
                       facet.by = "PKR", ncol = 4, short.panel.labs = TRUE) +
  labs(title = "Relative PDAC Muc5+ cells in Lobule and Stroma", subtitle = "All cases") +
  xlab(NULL) + ylab("% Muc5+ tumor cells") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12)) +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12)) +
  ylim(-1, 120) +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"))+
  scale_color_manual(values = c("#FC8D62", "#66C2A5"))

stat.test_Muc5_1 <- stat.test_Muc5 %>% add_xy_position(x = "Tissue_compartment")

Suppl_Fig_8b <- boxes_Muc5 + stat_pvalue_manual(label = "p.adj.signif",
                               stat.test_Muc5_1, tip.length = 0) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
Suppl_Fig_8b

boxes_CDX2 <- ggboxplot(all_CDX2, x = "Tissue_compartment", y = "Tumor..Positive..",
                        color = "Tissue_compartment", palette = "Set2",
                        add = "jitter",
                        facet.by = "PKR", ncol = 4, short.panel.labs = TRUE) +
  labs(title = "Relative PDAC CDX2+ cells in Lobule and Stroma", subtitle = "All cases") +
  xlab(NULL) + ylab("% CDX2+ tumor cells") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), plot.subtitle =element_text(hjust = 0.5, size = 12)) +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12)) +
  ylim(-1, 120) +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"))

stat.test_CDX2_1 <- stat.test_CDX2 %>% add_xy_position(x = "Tissue_compartment")

Suppl_Fig_9b <- boxes_CDX2 + stat_pvalue_manual(label = "p.adj.signif",
                                stat.test_CDX2_1, tip.length = 0) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
Suppl_Fig_9b

Boxplots

Only cases significant difference between lobular - and stromal invasion.
Each dot = 1 ROI.

Correlation plot of the markers in IHC cohort, No imputation, enabled by using the data matrix all_combined_exl, and only using complete obervations.

head(cor_A)
##            CA125       CDX2       CK17        CK5      HMGA2       Muc5
## CA125  1.0000000 -0.2810753  0.8707811  0.4855878  0.8797215 -0.5688701
## CDX2  -0.2810753  1.0000000 -0.4758926 -0.2144643 -0.2169090  0.4749056
## CK17   0.8707811 -0.4758926  1.0000000  0.5885561  0.7987999 -0.5422339
## CK5    0.4855878 -0.2144643  0.5885561  1.0000000  0.2762256 -0.4209848
## HMGA2  0.8797215 -0.2169090  0.7987999  0.2762256  1.0000000 -0.5028824
## Muc5  -0.5688701  0.4749056 -0.5422339 -0.4209848 -0.5028824  1.0000000
head(testRes)
## $p
##              CA125         CDX2         CK17          CK5        HMGA2
## CA125 0.000000e+00 2.888282e-03 3.153729e-11 1.384304e-05 1.048558e-15
## CDX2  2.888282e-03 0.000000e+00 4.599406e-05 4.587324e-03 3.733383e-06
## CK17  3.153729e-11 4.599406e-05 0.000000e+00 1.907032e-09 2.601302e-09
## CK5   1.384304e-05 4.587324e-03 1.907032e-09 0.000000e+00 1.052497e-13
## HMGA2 1.048558e-15 3.733383e-06 2.601302e-09 1.052497e-13 0.000000e+00
## Muc5  1.673417e-01 1.340409e-03 3.937881e-16 4.357883e-04 3.447229e-09
##               Muc5
## CA125 1.673417e-01
## CDX2  1.340409e-03
## CK17  3.937881e-16
## CK5   4.357883e-04
## HMGA2 3.447229e-09
## Muc5  0.000000e+00
## 
## $lowCI
##            CA125        CDX2       CK17        CK5      HMGA2        Muc5
## CA125  1.0000000 -0.33160594  0.2518562  0.1916069  0.5673797 -0.17920192
## CDX2  -0.3316059  1.00000000 -0.3120629 -0.3583226 -0.4887262  0.06676752
## CK17   0.2518562 -0.31206294  1.0000000  0.2600504  0.2898819 -0.43224971
## CK5    0.1916069 -0.35832262  0.2600504  1.0000000  0.4311637 -0.34083972
## HMGA2  0.5673797 -0.48872617  0.2898819  0.4311637  1.0000000 -0.52379566
## Muc5  -0.1792019  0.06676752 -0.4322497 -0.3408397 -0.5237957  1.00000000
## 
## $uppCI
##             CA125        CDX2       CK17         CK5      HMGA2        Muc5
## CA125  1.00000000 -0.07139553  0.4381845  0.46826847  0.7753870  0.03141959
## CDX2  -0.07139553  1.00000000 -0.1131129 -0.06875649 -0.2150961  0.26928919
## CK17   0.43818452 -0.11311287  1.0000000  0.47748503  0.5263557 -0.27721735
## CK5    0.46826847 -0.06875649  0.4774850  1.00000000  0.6522895 -0.10113630
## HMGA2  0.77538701 -0.21509613  0.5263557  0.65228948  1.0000000 -0.28664106
## Muc5   0.03141959  0.26928919 -0.2772173 -0.10113630 -0.2866411  1.00000000

Draw correlation matrix with complex heatmap package

Suppl_Fig_3b <- draw(hp, annotation_legend_list = lgd_list, ht_gap = unit(1, "cm") )

Suppl_Fig_3b

#Sanity-check:
dim(matrix_A) # 488*6 = 2928
## [1] 488   6
#get nr of NA:
sum(is.na(matrix_A)) #823 
## [1] 823
2928-823
## [1] 2105
nrow(all_combined_exl)
## [1] 2105
#(dim counts) - NA = the quantified ROIs, as in the rownumbers for all_combined.

i_dim <- 2928-823
i_rows <- as.numeric(nrow(all_combined_exl))

identical(i_dim, i_rows) #2105 
## [1] TRUE

Heatmaps

Heatmap type A: Case id + ROIs across Stains.

pheatmap(t_m_matrix_A, fontsize_col = 6, annotation_col = m_A, annotation_colors = annot_colors_A, show_colnames = F, labels_col = "Case ID and Stain",  main = "Heatmap A: Case ID and ROIs over Stains. % Positive tumor cells", cluster_cols = T, cluster_rows = T)

Heatmap type B:

Fig_2e <- pheatmap(t_m_matrix_B, cluster_rows = T, cluster_cols = T, annotation_row = m_B, color=col.pal2, annotation_colors = annot_colors_B, show_rownames = F, cutree_cols = 2, main = "Heatmap B: Case ID and stains over ROI:s. Values: % Positive tumor cells") 

Fig_2e

#Sanity-check:
dim(t_m_matrix_B) # 159*18
## [1] 159  18
#get nr of NA:
sum(is.na(t_m_matrix_B)) #354
## [1] 354
#(dim counts) - NA = the quantified ROIs, as in the rownumbers for all_combined.

i_dim <- (159*18)-354
i_rows <- as.numeric(nrow(all_combined))

identical(i_dim, i_rows)
## [1] TRUE

Heatmap type C:

pheatmap(m_matrix_C, annotation_colors = annot_colors_Call, show_colnames = F, color=col.pal2, annotation_col = m_CC, annotation_row = m_Crows, main = "Heatmap C: Case ID  over ROI:s and stains. Values: % Positive tumor cells", cluster_rows = T, cluster_cols = T)

Heatmap C, averaged compartments

pheatmap(C_AVG_2, cutree_rows = 2, annotation_colors = annot_colors_Call, color=col.pal2, annotation_row = m_Crows, cutree_cols = 2, main = "Heatmap C: Case ID  over ROI:s and stains. Values: Average % Positive tumor cells", cluster_rows = T, cluster_cols = T)
## Warning: The input is a data frame, convert it to the matrix.

pheatmap(tmB_CK17, cutree_cols = 2, color=col.pal2, main = "Heatmap B for CK17: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.

pheatmap(tmB_Muc5, cutree_cols = 2,color=col.pal2, main = "Heatmap B for Muc5: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.

pheatmap(tmB_CK5, cutree_cols = 2, color=col.pal2,main = "Heatmap B for CK5: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.

pheatmap(tmB_HMGA2, cutree_cols = 2, color=col.pal2,main = "Heatmap B for HMGA2: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.

pheatmap(tmB_CA125, cutree_cols = 2, color=col.pal2,main = "Heatmap B for CA125: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.

pheatmap(tmB_CDX2, cutree_cols = 2,color=col.pal2, main = "Heatmap B for CDX2: Case ID over ROI:s. Values: % Positive tumor cells")
## Warning: The input is a data frame, convert it to the matrix.

# -> only Muc5 have a complete separation
## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 54 rows containing missing values or values outside the scale range
## (`geom_point()`).

boxes_overall + stat_pvalue_manual(label = "p.adj.signif",
                                   stat.test_overallavg, tip.length = 0) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) # this is alright but does not show the paired (cases)
## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 54 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 54 rows containing missing values or values outside the scale range
## (`geom_point()`).

nlevels(a$PKR) #31, number of unique cases
## [1] 31
a_return_perstain <- a %>%
  group_by(Average_Stain) %>%
  summarise(N.instances = n())
a_return_perstain #62, the double of 31. one average per case and compartment 
## # A tibble: 6 × 2
##   Average_Stain N.instances
##   <fct>               <int>
## 1 CA125                  62
## 2 CDX2                   62
## 3 CK17                   62
## 4 CK5                    62
## 5 HMGA2                  62
## 6 Muc5                   62
a_return_perstainandcase <- a %>%
  group_by(Average_Stain, PKR) %>%
  summarise(N.instances = n())
## `summarise()` has grouped output by 'Average_Stain'. You can override using the
## `.groups` argument.
a_return_perstainandcase
## # A tibble: 186 × 3
## # Groups:   Average_Stain [6]
##    Average_Stain PKR   N.instances
##    <fct>         <fct>       <int>
##  1 CA125         1               2
##  2 CA125         11              2
##  3 CA125         12              2
##  4 CA125         13              2
##  5 CA125         14              2
##  6 CA125         15              2
##  7 CA125         16              2
##  8 CA125         17              2
##  9 CA125         18              2
## 10 CA125         2               2
## # ℹ 176 more rows
## Warning: Removed 42 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 42 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

#no plasticity, visualising paired:
boxes_overall_paired_noplast <- ggplot(c_noplast, aes(x = Tissue_compartment, y = Avg_value)) + 
  geom_boxplot(aes(fill = Tissue_compartment, color = Tissue_compartment), alpha = 0.2) +
  geom_line(aes(group = PKR), colour = "gray") + 
  geom_point(size = 1.5, aes(fill = Tissue_compartment, color = Tissue_compartment)) + 
  facet_wrap(~ Average_Stain) +
  stat_pvalue_manual(label = "p.adj.signif",
                     stat.test_overallavg_noplast, tip.length = 0) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme_clean() +
  scale_color_brewer(palette= "Accent") +
  scale_fill_brewer(palette = "Accent") +
  labs(title = "Average expression of PDAC cells in Lobule and Stroma", subtitle = "Subset: Cases with no plasticity") +
  xlab(NULL) + ylab("Average + tumor cells") +
  theme(plot.margin = margin(1.1, 1.1, 1.1, 1.1, "cm")) +
  ylim(-1, 120)

boxes_overall_paired_noplast
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

The simpson index

The index ranges between 0-1, where 1 = high diversity and 0 = no diversity

Matrix with the distribution scores: Distribution levels; below called “Intensity scores. This category represents the fraction of positive cells in each ROI:

sim_all_c6$`2` #Example of how the data matrix looks like, for just one case, PKR-2
##     CA125      CDX2      CK17       CK5     HMGA2      Muc5 
## 0.8104575 0.5032680 0.8104575 0.6601307 0.7124183 0.8366013
ggplot(simpsons_2, aes(x=PKR, y=Simpsons, 
                     color = Stain, fill = Stain, 
                     position = "dodge")) +
  geom_bar(stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
  scale_fill_brewer(palette = "PuOr") +
  scale_color_brewer(palette = "PuOr") + 
  geom_hline(yintercept = simpline,linetype = "longdash") +
  geom_text(aes(29.5, simpline, label = "3rd quantile", vjust = -1), col = "brown") +
  labs(title = "Simpson index of all cases and stains") +
  theme_clean() 

ggplot(simpsons_lgpval_2, aes(x=PKR, 
                            color = Stain, fill = Stain, 
                            position = "dodge")) +
  geom_bar(aes(y = p_val), stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
  geom_bar(aes(y = Simpsons), stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
  scale_fill_brewer(palette= "BrBG", type = "div") +
  scale_color_brewer(palette= "BrBG", type = "div") + 
  geom_hline(yintercept = log10(pline), linetype = "dashed", color = "black") +
  geom_text(aes(29.5, simpline, label = "3rd quantile", vjust = -1), size = 3, col = "black") +
  geom_text(aes(28.8, log10(pline), label = "p = 0.05", vjust = -1), size = 3, col = "black") +
  geom_hline(yintercept = simpline,linetype = "dashed", col = "black") +
  geom_hline(yintercept = 0,linetype = "solid", color = "black")+
  labs(title = "p-values of wilcoxon test and Simpsons for all cases and stains") +
  ylab(c("lg(adjusted p-values from wilxocon testing)               Simpson index of diversity")) +
  theme_clean()

# Scatterplot

x_line <- simpsons_pval_2$p_val
y_line <- simpsons_pval_2$Simpsons

scatter_simppval <- ggplot(simpsons_pval_2, aes(p_val, Simpsons, color = Stain), size = 4, alpha = 0.8)


Suppl_Fig_10a <- scatter_simppval + geom_point(aes(alpha = 0.7, size = 0.5)) +
  ggtitle("scatterplot of p-values and simpsons index: Suppl fig 10a") +
  geom_hline(yintercept = simpline,linetype = "dashed", col = "black") +
  geom_text(aes(0.75, simpline, label = "3rd quantile", vjust = -1), size = 4, col = "black") +
  geom_vline(xintercept = pline, linetype = "dashed", color = "black") +
  geom_text(aes(0.15, pline, label = "p = 0.05 ", vjust = -1), size = 4, col = "black") +
  scale_color_manual(values = 
                       c("CK17"="#e41a1c", "CK5"="#984ea3", "HMGA2" = "#ff7f00", "CA125" = "#ffff33", "CDX2" = "#4daf4a", "Muc5" = "#377eb8"))+
  theme_clean()

Suppl_Fig_10a
## Warning in geom_text(aes(0.75, simpline, label = "3rd quantile", vjust = -1), : All aesthetics have length 1, but the data has 159 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_text(aes(0.15, pline, label = "p = 0.05 ", vjust = -1), : All aesthetics have length 1, but the data has 159 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

scatter_simppval + geom_point(aes(alpha = 0.7, size = 3)) +
  ggtitle("scatterplot of p-values and simpsons index") +
  geom_hline(yintercept = simpline,linetype = "dashed", col = "black") +
  geom_text(aes(0.75, simpline, label = "3rd quantile", vjust = -1), size = 4, col = "black") +
  geom_vline(xintercept = pline, linetype = "dashed", color = "black") +
  geom_text(aes(0.15, pline, label = "p = 0.05 ", vjust = -1), size = 4, col = "black") +
  scale_color_manual(values = 
                       c("CK17"="#e41a1c", "CK5"="#984ea3", "HMGA2" = "#ff7f00", "CA125" = "#ffff33", "CDX2" = "#4daf4a", "Muc5" = "#377eb8"))+
  geom_smooth(method = "lm", se = FALSE) +
  theme_clean()
## Warning in geom_text(aes(0.75, simpline, label = "3rd quantile", vjust = -1), : All aesthetics have length 1, but the data has 159 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## All aesthetics have length 1, but the data has 159 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## `geom_smooth()` using formula = 'y ~ x'

simpsons_pval_3 <- simpsons_pval_2
simpsons_pval_3$Event_simp_above <- factor(186)
simpsons_pval_3$Event_pval_below <- factor(186)

simpsons_pval_3 <- simpsons_pval_3 %>% mutate(Event_simp_above = ifelse(Simpsons >= simpline, 
                                                                  'true', .$Event_simp_above))
simpsons_pval_3 <- simpsons_pval_3 %>% mutate(Event_pval_below = ifelse(p_val <= pline, 
                                                                  'true', .$Event_pval_below))

simpsons_pval_3 <- simpsons_pval_3 %>% mutate(Event_simp_above = ifelse(Event_simp_above != "true", 
                                                                  'false', .$Event_simp_above))
simpsons_pval_3 <- simpsons_pval_3 %>% mutate(Event_pval_below = ifelse(Event_pval_below != "true", 
                                                                  'false', .$Event_pval_below))
simpsons_pval_4 <- simpsons_pval_3[, c(1, 3, 5:7)]  
simpsons_pval_4[is.na(simpsons_pval_4)] <- 'false'
simpsons_venn <- simpsons_pval_4[, c(3, 4, 5)]
simpsons_venn <- simpsons_venn  %>% mutate(across(starts_with("Event"), as.logical))

Vennfunc <- lapply(simpsons_venn[, c(2, 3)], function (x)
  which(x == 1))

Venn_simp_pval <- ggVennDiagram(Vennfunc)

Suppl_Fig_10b_old <- Venn_simp_pval +
  scale_fill_gradient(low = "#0072B2", high = "#CC79A7")
  scale_colour_gradient(low = "#0072B2", high = "#CC79A7")
## <ScaleContinuous>
##  Range:  
##  Limits:    0 --    1
  theme_cleantable()
## List of 7
##  $ axis.title.x: list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.title.y: list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.text.x : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.ticks.x: list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.ticks.y: list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.line.x : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.line.y : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE
Suppl_Fig_10b_old #color changed

# Venn, version where the size of Venn circles correspond to the numbers of cells in the respective group
# colors now correspond to the group identity instead of nr of ROIs in each group.
listed_z <- NULL

propVenn <- draw.venn(list_x = Vennfunc$Event_simp_above, list_y = Vennfunc$Event_pval_below, list_z = listed_z, title = "Venn diagram of overlapping measures of heterogeneity", xtitle = "Simpsons above threshold", ytitle = "p−value sign", x_c = "#F1BB7B", y_c = "#FD6467",  nrtype = "pct")
## [1] "x total: 40"
## [1] "y total: 48"
## [1] "z total: 0"
## [1] "x only: 21"
## [1] "y only: 29"
## [1] "z only: 0"
## [1] "x-y total overlap: 19"
## [1] "x-z total overlap: 0"
## [1] "y-z total overlap: 0"
## [1] "x-y only overlap: 19"
## [1] "x-z only overlap: 0"
## [1] "y-z only overlap: 0"
## [1] "x-y-z overlap: 0"

Suppl_Fig_10b_new <- propVenn
Suppl_Fig_10b_new
## $x
##  [1]  10  11  19  20  23  25  32  44  58  62  65  68  71  72  75  76  77  81  86
## [20]  92 108 112 113 114 116 117 120 122 125 126 127 128 130 132 133 134 136 138
## [39] 151 157
## 
## $y
##  [1]   1   5   7  10  11  30  36  39  42  48  55  60  62  63  65  67  68  70  71
## [20]  73  74  76  77  78  81  83  86  88  92  93  98 100 114 117 120 121 122 123
## [39] 126 128 132 135 136 137 140 147 152 154
## 
## $z
## NULL
## 
## $x_only
##  [1]  19  20  23  25  32  44  58  72  75 108 112 113 116 125 127 130 133 134 138
## [20] 151 157
## 
## $y_only
##  [1]   1   5   7  30  36  39  42  48  55  60  63  67  70  73  74  78  83  88  93
## [20]  98 100 121 123 135 137 140 147 152 154
## 
## $z_only
## NULL
## 
## $xy
##  [1]  10  11  62  65  68  71  76  77  81  86  92 114 117 120 122 126 128 132 136
## 
## $xz
## NULL
## 
## $yz
## NULL
## 
## $xy_only
##  [1]  10  11  62  65  68  71  76  77  81  86  92 114 117 120 122 126 128 132 136
## 
## $xz_only
## NULL
## 
## $yz_only
## NULL
## 
## $xyz
## NULL
simppval_return <- simpsons_venn %>%
  group_by(Event_pval_below, Event_simp_above) %>%
  summarise(N.events = n())
## `summarise()` has grouped output by 'Event_pval_below'. You can override using
## the `.groups` argument.
simppval_return 
## # A tibble: 4 × 3
## # Groups:   Event_pval_below [2]
##   Event_pval_below Event_simp_above N.events
##   <lgl>            <lgl>               <int>
## 1 FALSE            FALSE                  90
## 2 FALSE            TRUE                   21
## 3 TRUE             FALSE                  29
## 4 TRUE             TRUE                   19
simpson_return_perstain <- simpsons_pval_2 %>%
  group_by(Stain) %>%
  summarise(N.instances = n())

simpson_return_perstain
## # A tibble: 6 × 2
##   Stain N.instances
##   <fct>       <int>
## 1 CA125          30
## 2 CDX2           28
## 3 CK17           31
## 4 HMGA2          21
## 5 Muc5           31
## 6 CK5            18
sum(simpson_return_perstain$N.instances)
## [1] 159
ggplot(exlg_simpson_pval_2, aes(x=PKR, 
                              color = Stain, fill = Stain, 
                              position = "dodge")) +
  geom_bar(aes(y = p_val), stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
  geom_bar(aes(y = Simpsons), stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
  scale_fill_brewer(palette = "PuOr") +
  scale_color_brewer(palette = "PuOr") + 
  geom_hline(yintercept = log10(pline), linetype = "dashed", color = "brown") +
  geom_hline(yintercept = simpline,linetype = "dashed", color = "brown") +
  geom_hline(yintercept = 0,linetype = "solid", color = "black")+
  geom_text(aes(4.4, log10(pline), label = "p < 0.05", vjust = 2), col = "brown") +
  geom_text(aes(4.4, simpline, label = "3rd quantile", vjust = -1), col = "brown") +
  labs(title = "p-values of wilcoxon test and Simpsons") +
  ylab(c("log(adjusted p-values from wilxocon testing)                            Simpson index of diversity")) +
  ylim(-3.8, 1.5) +
  theme_clean()

ggplot(het, aes(x=PKR, position = "dodge", color = Group, fill = Group)) +
  geom_bar(aes(y = Heterogeneity_Cat), stat = "identity", width=0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
  geom_bar(aes(y = -Plasticity_Cat), stat = "identity", width = 0.6, position = position_dodge2(width = 0.5, preserve = "single")) +
  scale_fill_brewer(palette = "Accent") +
  scale_color_brewer(palette = "Accent") + 
  geom_hline(yintercept = 0,linetype = "solid", color = "black")+
  labs(title = "Comparing heterogeneity measures") +
  ylab(c("Plasticity by Wilcox                            Heterogeneity by Simpsons")) +
  scale_y_continuous(breaks=c(-3, -2, -1, 1, 2, 3), label= c("Extensive", "Moderate", "Slight", "Slight", "Moderate", "Extensive")) +
  theme_clean()

## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

sp + 
  facet_grid(ROI ~ .)+ #sort on (basal like) expression in stroma.
  labs(title = "Ratio of classical and basal-like expressing cells in stromal and acinar ROIs", subtitle = "All cases") 

Ratio_joined + 
  facet_grid(ROI ~ .) +
  geom_point(aes(color=Plasticity, alpha = Group)) +
  scale_colour_manual(values = c(Extensive = "#4D004B", Moderate = "#8C6BB1", Slight = "#9EBCDA", None = "#808080", Classical = "#97C8FF", Basal = "#FF6242")) +
  scale_alpha_discrete(range=c(0, 1))+
  labs(title = "Ratio of all positive cells separated on compartment", subtitle = "All cases")  
## Warning: Using alpha for a discrete variable is not advised.

sp_noplast + 
  facet_grid(ROI ~ .) #sort on (basal like) expression in stroma.

Distribution of % positive cells for all cases

Basal stains in stroma. First, all pooled, followed by the individual stains.

distrib_stroma_basal

distrib_stroma_CK17

distrib_stroma_CK5

distrib_stroma_CA125 

distrib_stroma_HMGA2

Classical stains in stroma. First, all pooled, followed by the individual stains.

distrib_stroma_classical

distrib_stroma_Muc5

distrib_stroma_CDX2

Basal stains in lobular regions. First, all pooled, followed by the individual stains.

distrib_lobule_basal

distrib_lobule_CK17

distrib_lobule_CK5 

distrib_lobule_HMGA2 

distrib_lobule_CA125 

Classical stains in acinar. First, all pooled, followed by the individual stains.

distrib_lobule_classical

distrib_lobule_Muc5

distrib_lobule_CDX2

Density plots, separated on compartment using the datasets distrib_s and distrib_a

density_stroma <- distrib_s %>% 
  ggplot(aes(x = Tumor..Positive.., fill = Stain)) +
  geom_density(color="#e9ecef", alpha=0.8) +
  facet_grid(~Stain)

density_stroma

#As ridgeline
ridges_stroma <- distrib_s %>% 
  ggplot(aes(x = Tumor..Positive.., y = Stain, fill = Stain)) +
  geom_density_ridges(color="#e9ecef", alpha=0.8)

ridges_stroma
## Picking joint bandwidth of 5.49

#As ridgeline with color gradient
ridges_gradient_stroma <- ggplot(distrib_s, aes(x = `Tumor..Positive..`, y = `Stain`, fill = ..x..)) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis(name = "Temp. [F]", option = "E") +
  labs(title = 'Density of stains in stroma') +
  theme_clean() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    )+
  xlab("% Positive cells") + ylab(NULL)
ridges_gradient_stroma
## Warning: The dot-dot notation (`..x..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(x)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Picking joint bandwidth of 5.49

# Violin plots
violin_stroma_trimmed <- distrib_s %>% 
  ggplot(aes(x = Stain, y = Tumor..Positive.., fill = Stain)) +
  geom_violin(trim = TRUE) +
  theme_clean()

violin_stroma_trimmed

density_lobule <- distrib_a %>% 
  ggplot(aes(x = Tumor..Positive.., fill = Stain)) +
  geom_density(color="#e9ecef", alpha=0.8) +
  facet_grid(~Stain)

density_lobule

#As ridgeline
ridges_lobule <- distrib_a %>% 
  ggplot(aes(x = Tumor..Positive.., y = Stain, fill = Stain)) +
  geom_density_ridges(color="#e9ecef", alpha=0.8)

ridges_lobule
## Picking joint bandwidth of 4.77

#As ridgeline with color gradient
ridges_gradient_lobule <- ggplot(distrib_a, aes(x = `Tumor..Positive..`, y = `Stain`, fill = ..x..)) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis(name = "Temp. [F]", option = "E") +
  labs(title = 'Density of stains in Lobule') +
  theme_clean() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    ) +
  xlab("% Positive cells") + ylab(NULL)
ridges_gradient_lobule
## Picking joint bandwidth of 4.77

# Violin plots
violin_acinar_trimmed <- distrib_a %>% 
  ggplot(aes(x = Stain, y = Tumor..Positive.., fill = Stain)) +
  geom_violin(trim = TRUE) +
  theme_clean()

violin_acinar_trimmed

#Violin plots for both compartments
distrib_all <- all_combined[, c(1, 2, 3, 5, 11, 12)]

violin_both_untrimmed <- distrib_all %>% 
  ggplot(aes(x = Stain, y = Tumor..Positive.., color = Tissue_compartment)) +
  geom_violin(trim = FALSE, adjust = 2) +
  theme_calc()

violin_both_untrimmed

Plasticity_ID <- m_Crows
Plasticity_ID$PKR <- rownames(Plasticity_ID)
# 'Basal' and 'Classical' both go to the moderate category


Plasticity_ID %>% count(Plasticity)
##   Plasticity  n
## 1      Basal  2
## 2  Classical  1
## 3  Extensive  4
## 4   Moderate  7
## 5       None  7
## 6     Slight 10
Plasticity_ID <- Plasticity_ID %>% mutate(Plasticity = ifelse(grepl('Basal', .$Plasticity), 'Moderate', .$Plasticity))
Plasticity_ID <- Plasticity_ID %>% mutate(Plasticity = ifelse(grepl('Classical', .$Plasticity), 'Moderate', .$Plasticity))
Plasticity_ID %>% count(Plasticity)
##   Plasticity  n
## 1  Extensive  4
## 2   Moderate 10
## 3       None  7
## 4     Slight 10
# Fraction of any type of switching; 24/31 ≈ 77,4%

range(all_combined_exl$Num.Detections)
## [1] 299 864
# between 299-864 cells in ROIs.
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-apple-darwin20
## Running under: macOS 15.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Stockholm
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] hrbrthemes_0.8.7      ggridges_0.5.6        forcats_1.0.0        
##  [4] stringr_1.5.1         Hmisc_5.2-3           BioVenn_1.1.3        
##  [7] survminer_0.5.0       dichromat_2.0-0.1     ggVennDiagram_1.5.2  
## [10] RColorBrewer_1.1-3    viridis_0.6.5         viridisLite_0.4.2    
## [13] ComplexHeatmap_2.22.0 patchwork_1.3.0       corrplot_0.95        
## [16] reshape2_1.4.4        data.table_1.17.0     ggpubr_0.6.0         
## [19] ggplot2_3.5.2         ggthemes_5.1.0        reactable_0.4.4      
## [22] rstatix_0.7.2         dplyr_1.1.4           tidyr_1.3.1          
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.17.1       jsonlite_2.0.0          shape_1.4.6.1          
##   [4] magrittr_2.0.3          farver_2.1.2            rmarkdown_2.29         
##   [7] GlobalOptions_0.1.2     zlibbioc_1.52.0         vctrs_0.6.5            
##  [10] Cairo_1.6-2             memoise_2.0.1           base64enc_0.1-3        
##  [13] htmltools_0.5.8.1       progress_1.2.3          plotrix_3.8-4          
##  [16] curl_6.2.2              broom_1.0.8             Formula_1.2-5          
##  [19] sass_0.4.10             bslib_0.9.0             htmlwidgets_1.6.4      
##  [22] plyr_1.8.9              httr2_1.1.2             zoo_1.8-14             
##  [25] cachem_1.1.0            lifecycle_1.0.4         iterators_1.0.14       
##  [28] pkgconfig_2.0.3         Matrix_1.7-3            R6_2.6.1               
##  [31] fastmap_1.2.0           GenomeInfoDbData_1.2.13 clue_0.3-66            
##  [34] digest_0.6.37           colorspace_2.1-1        AnnotationDbi_1.68.0   
##  [37] S4Vectors_0.44.0        crosstalk_1.2.1         textshaping_1.0.1      
##  [40] RSQLite_2.3.11          reactR_0.6.1            labeling_0.4.3         
##  [43] filelock_1.0.3          km.ci_0.5-6             mgcv_1.9-3             
##  [46] httr_1.4.7              abind_1.4-8             compiler_4.4.1         
##  [49] fontquiver_0.2.1        bit64_4.6.0-1           withr_3.0.2            
##  [52] doParallel_1.0.17       htmlTable_2.4.3         backports_1.5.0        
##  [55] carData_3.0-5           DBI_1.2.3               Rttf2pt1_1.3.12        
##  [58] ggsignif_0.6.4          biomaRt_2.62.1          rappdirs_0.3.3         
##  [61] rjson_0.2.23            ggsci_3.2.0             tools_4.4.1            
##  [64] foreign_0.8-90          extrafontdb_1.0         nnet_7.3-20            
##  [67] glue_1.8.0              nlme_3.1-168            checkmate_2.3.2        
##  [70] cluster_2.1.8.1         generics_0.1.4          gtable_0.3.6           
##  [73] KMsurv_0.1-5            hms_1.1.3               utf8_1.2.5             
##  [76] xml2_1.3.8              car_3.1-3               XVector_0.46.0         
##  [79] BiocGenerics_0.52.0     foreach_1.5.2           pillar_1.10.2          
##  [82] circlize_0.4.16         splines_4.4.1           BiocFileCache_2.14.0   
##  [85] lattice_0.22-7          survival_3.8-3          bit_4.6.0              
##  [88] tidyselect_1.2.1        fontLiberation_0.1.0    Biostrings_2.74.1      
##  [91] knitr_1.50              fontBitstreamVera_0.1.1 gridExtra_2.3          
##  [94] IRanges_2.40.1          svglite_2.2.0           stats4_4.4.1           
##  [97] xfun_0.52               Biobase_2.66.0          matrixStats_1.5.0      
## [100] stringi_1.8.7           UCSC.utils_1.2.0        yaml_2.3.10            
## [103] evaluate_1.0.3          codetools_0.2-20        extrafont_0.19         
## [106] gdtools_0.4.2           tibble_3.2.1            cli_3.6.5              
## [109] rpart_4.1.24            xtable_1.8-4            systemfonts_1.2.3      
## [112] jquerylib_0.1.4         survMisc_0.5.6          Rcpp_1.0.14            
## [115] GenomeInfoDb_1.42.3     dbplyr_2.5.0            png_0.1-8              
## [118] parallel_4.4.1          blob_1.2.4              prettyunits_1.2.0      
## [121] scales_1.4.0            purrr_1.0.4             crayon_1.5.3           
## [124] GetoptLong_1.0.5        rlang_1.1.6             KEGGREST_1.46.0